## Functions to calculate quantities and produce figures 
##
##
## Kevin Quinn and Guoer Liu
## 11/19/24

if (!require("WeightIt")) {
 devtools::install_version("WeightIt", version = "1.1.0")
} else {
 current_version <- packageVersion("WeightIt")
 if (current_version != "1.1.0") {
   devtools::install_version("WeightIt", version = "1.1.0")
 }
}

 if (!require("Matching")) {
 devtools::install_version("Matching", version = "4.10-14")
} else {
 if (packageVersion("Matching") != "4.10-14") {
   devtools::install_version("Matching", version = "4.10-14")
 }
}

## Function to calculate DFBETAS (Section 3.2)
## 
## DFBETA.weighted() calculates DFBETA values for the coefficient on
##  the treatment indicator in a WLS bivariate regression of outcome
##  on treatment. 
##
## arguments:
##  y:          n x 1 vector of outcomes
##  treatment:  n x 1 vector of binary treatment values (1=T, 0=C)
##  weights:    n x 1 vector of weights
##
## output:
##  DFBETA: n x 1 vector of DFBETA values for the coefficient on treatment
##
## Ref: Li and Valliant (2011) "Linear Regression Influence Diagnostics for
##		Unclustered Survey Data", Journal of Official Statistics.
##

DFBETA.weighted <- function(y, treatment, weights){

    ## minimal error checking
    n <- length(y)
    if (length(treatment) != n){
        stop("y and treatment need to be of same length.")
    }
    if (length(weights) != n){
        stop("y and weights need to be of same length.")
    }
    if (sum(treatment %in% c(0,1)) != n){
        stop("treatment contains values other than 0 or 1")
    }
    
    X <- as.matrix(cbind(1, treatment))    
    W <- diag(weights)
    beta.hat <- solve(t(X) %*% W %*% X) %*% t(X) %*% W %*% y 
    ehat <-  y -  X %*% beta.hat
    A <- t(X) %*% W %*% X
    A.inv <- solve(A)
    DFBETA.mat <- matrix(NA, n, ncol(X))
    for (i in 1:n){
        x.i <- matrix(X[i,], 1, ncol(X))
        h.i <- x.i %*% A.inv %*% t(x.i) * W[i,i]
        DFBETA.mat[i,] <- (A.inv %*% t(x.i) * ehat[i] * W[i,i]) / (1 - h.i[1])
    }
    DFBETA <- DFBETA.mat[,2] 
    return(DFBETA)
}

## TASMD() calculates the target absolute standardized mean difference
## proposed by Chattopadhyay et al. (2020). (Section 3.4.1)
##
## estimand: ATT
##
## arguments: 
##  x:          n x 1 vector of covariate values to check balance on
##  treatment:  n x 1 vector of binary treatment values (1=T, 0=C)
##  weights:    n x 1 vector of weights
##
## output:  a list with the following elements:
##  TASMD.0:    scalar giving TASMD for comparison of control group to
##                target.
##
TASMD <- function(x, treatment, weights){

    w <- weights
    ## minimal error checking
    n <- length(x)
    
    x.bar.target <- mean(x[treatment == 1]) # b/c ATT is the estimand
    
    ## calcuate weighted mean of X in control group
    x.bar.0 <- sum(x[treatment == 0] * w[treatment == 0]) / sum(w[treatment == 0])
    
    ## calculate SD of X in control group
    SD.0 <- sd(x[treatment == 0])

    ## put it together
    TASMD.0 <- abs(x.bar.0 - x.bar.target) / SD.0

    return(TASMD.0)   
}



## TASMD.ATE() calculates the target absolute standardized mean difference
## proposed by Chattopadhyay et al. (2020).
##
## estimand: ATE
##
## arguments: 
##  x:          n x 1 vector of covariate values to check balance on
##
##  treatment:  n x 1 vector of binary treatment values (1=T, 0=C)
##
##  weights:    n x 1 vector of weights
##
##
##  output:  a list with the following elements:
##  TASMD.1:    scalar giving TASMD for comparison of treated group to
##                target.
##
##  TASMD.0:    scalar giving TASMD for comparison of control group to
##                target.
##

TASMD.ATE <- function(x, treatment, weights, pscores=NULL, estimand="ATE"){

    w <- weights
    ## minimal error checking
    n <- length(x)

    x.bar.target <- mean(x) # b/c ATE is the estimand
 
    ## calcuate weighted mean of X in treated group
    x.bar.1 <- sum(x[treatment == 1] * w[treatment == 1]) / sum(w[treatment == 1])
    
    ## calcuate weighted mean of X in control group
    x.bar.0 <- sum(x[treatment == 0] * w[treatment == 0]) / sum(w[treatment == 0])
    
    ## calculate SD of X in treated group
    SD.1 <- sd(x[treatment == 1])

    ## calculate SD of X in control group
    SD.0 <- sd(x[treatment == 0])


    ## put it together
    TASMD.1 <- abs(x.bar.1 - x.bar.target) / SD.1
    TASMD.0 <- abs(x.bar.0 - x.bar.target) / SD.0


    return(list(TASMD.1=TASMD.1, TASMD.0=TASMD.0))
    
}

## Kolmogorov-Smirnov Test Statistic (Section 3.4.2)
##
## wtd.ks() computes and returns the Kolmogorov-Smrinov statistic
## for testing the equality of two weighted distributions.
##
## estimand: ATT
##
## arguments:
##  x:          n x 1 vector (the covariate to be plotted)
##  treatment:  n x 1 vector of binary treatment values (1=T, 0=C)
##  weights:    n x 1 vector of weights
##
## output:
##  D:          the KS-statistic
##
## function taken from fastDR package (author Greg Ridgeway)
##   (variable name changed, and error checking added) 

wtd.ks <- function(x, treatment, weights) {

    w <- weights
    ## minimal error checking
    n <- length(x)
    if (length(treatment) != n){
        stop("x and treatment need to be of same length.")
    }
    if (length(w) != n){
        stop("x and w need to be of same length.")
    }
    if (sum(treatment %in% c(0,1)) != n){
        stop("treatment contains values other than 0 or 1")
    }

    w[treatment == 1] <-  w[treatment == 1]/sum(w[treatment == 1])
    w[treatment == 0] <- -w[treatment == 0]/sum(w[treatment == 0])
    ind <- order(x)
    cumv <- abs(cumsum(w[ind]))
    cumv <- cumv[diff(x[ind]) != 0]

    D <- ifelse(length(cumv) > 0, max(cumv), 0)
    return(D)
}

## wtd.ks.ate() computes and returns the Kolmogorov-Smrinov statistic
## for testing the equality of two weighted distributions.
##
## estimand: ATE
##
## arguments:
##  x:          n x 1 vector (the covariate to be plotted)
##
##  treatment:  n x 1 vector of binary treatment values (1=T, 0=C)
##
##  weights:    n x 1 vector of weights
##
##
## output:
##   KS.weighted.O1
##   KS.weighted.0targ
##   KS.weighted.1targ
##   KS.raw.01
##   KS.raw.0targ
##   KS.raw.1targ
##
## function taken from fastDR package (author Greg Ridgeway)
##   (variable name changed, and error checking added) 

wtd.ks.ate <- function(x, treatment, weights) {
  
  w <- weights
  ## minimal error checking
  n <- length(x)
  if (length(treatment) != n){
    stop("x and treatment need to be of same length.")
  }
  if (length(w) != n){
    stop("x and w need to be of same length.")
  }
  if (sum(treatment %in% c(0,1)) != n){
    stop("treatment contains values other than 0 or 1")
  }
  
  h <- rep(1, n)
  
  # weighted.01
  D.wtd.O1 <- wtd.ks(x, treatment, weights)
  
  # weighted.0targ
  D.wtd.0targ <- wtd.ks(c(x[treatment == 0], x), 
                        c(rep(1, sum(treatment == 0)), rep(0, n)),
                        c(w[treatment == 0], h))
  
  # weighted.1targ
  D.wtd.1targ <- wtd.ks(c(x[treatment == 1], x), 
                        c(rep(1, sum(treatment == 1)), rep(0, n)),
                        c(w[treatment == 1], h))
  
  # raw.01
  D.raw.01 <- wtd.ks(x, treatment, rep(1, n))
  
  # raw.0targ
  D.raw.0targ <- wtd.ks(c(x[treatment == 0], x), 
                        c(rep(1, sum(treatment == 0)), rep(0, n)),
                        c(rep(1, sum(treatment == 0)), h))
  # raw.1targ
  D.raw.1targ <- wtd.ks(c(x[treatment == 1], x), 
                        c(rep(1, sum(treatment == 1)), rep(0, n)),
                        c(rep(1, sum(treatment == 1)), h))
  
  # output
  
  return(list(KS.weighted.O1 = D.wtd.O1,
              KS.weighted.0targ = D.wtd.0targ,
              KS.weighted.1targ = D.wtd.1targ,
              KS.raw.01 = D.raw.01,
              KS.raw.0targ = D.raw.0targ,
              KS.raw.1targ = D.raw.1targ))
}


## wtd.qqplot() constructes a weighted QQ-plot to compared a weighted
##   covariate's distribution between treated and control subgroups 
##   (Section 3.4.2)
##
## arguments:
##  x:          n x 1 vector (the covariate to be plotted)
##  treatment:  n x 1 vector of binary treatment values (1=T, 0=C)
##  weights     n x 1 vector of weights
##  x0lab:      label for the x-axis (quantiles of x[treatment == 0])
##  x1lab:      label for the y-axis (quantiles of x[treatment == 1])
##  line.col:   string giving the color of the 45 degree line
##  main:       string sent to the "main" argument of the plot() function
##  add:        overlay two wtd.qqplot

wtd.qqplot <- function(x, treatment, weights,
                       x0lab="x0", x1lab="x1",
                       line.col="black", 
                       points.pch = 19,
                       points.cex = 1.5,
                       points.lwd = 1,
                       points.col="grey", 
                       points.alpha=1, main="", add=FALSE, legend.label=NULL, legend.pos="topright"){
    library(Hmisc)
    library(scales)  # Needed for alpha function

    ## minimal error checking
    n <- length(x)
    if (length(treatment) != n){
        stop("x and treatment need to be of same length.")
    }
    if (length(weights) != n){
        stop("x and weights need to be of same length.")
    }
    if (sum(treatment %in% c(0,1)) != n){
        stop("treatment contains values other than 0 or 1")
    }
    

    x1 <- x[treatment == 1]
    x0 <- x[treatment == 0]
    w1 <- weights[treatment == 1]
    w0 <- weights[treatment == 0]

    n0 <- length(x0)
    n1 <- length(x1)
    w0sum <- sum(w0)
    w1sum <- sum(w1)
    if (n0 != length(w0)){
        stop("x0 and w0 not of same length.\n")
    }
    if (n1 != length(w1)){
        stop("x1 and w1 not of same length.\n")
    }
    n.min <- min(n0, n1)
    qvals <- seq(from=1/n.min, to=(n.min-1)/n.min, length.out=n.min)
    
    ## deal with negative weights
    if (min(w0) < 0){
        stop("\nw0 contains negative values. Weights must be non-negative.\n")
    }
    if (min(w1) < 0){
        stop("\nw1 contains negative values. Weights must be non-negative.\n")
    }
    w0 <- w0 / w0sum
    w1 <- w1 / w1sum
    q0 <- wtd.quantile(x0, normwt=TRUE, probs=qvals, weights=w0)
    q1 <- wtd.quantile(x1, normwt=TRUE, probs=qvals, weights=w1)

    q.min <- min(c(q0, q1))
    q.max <- max(c(q0, q1))
    
    points.col <- alpha(points.col, points.alpha)

    if (!add) {
        plot(q0, q1, xlab=x0lab, ylab=x1lab,
             xlim=c(q.min, q.max),
             ylim=c(q.min, q.max),
             main=main, col=points.col,
             pch = points.pch, 
             cex = points.cex,
             lwd = points.lwd,
             las=1)  # Set las=1 for horizontal y-axis label
        abline(0, 1, col=line.col, lty = 5, lwd = 0.6)
    } else {
        points(q0, q1, 
               col=points.col, pch = points.pch, cex = points.cex,
               lwd = points.lwd)
    }

    if (!is.null(legend.label) && !add) {
        legend(legend.pos, legend=legend.label, col=alpha(points.col, points.alpha), pch=points.pch, cex=1)
    }
}
